James Cole - December 2019. Built with R version 3.5.2
This is Notebook contains the final brain age analysis of MS patient data and controls from the UCL cohort, the MAGNIMS consortium and the Imperial College London PET study (n=25). The analysis uses brain-predicted age difference (brain-PAD) to look at brain ageing in the context of MS. The brain-PAD values were generated in PRONTO, using an independent healthy (n=2001) training dataset, and the values were corrected for proportional bias using the intercept and slope of the age by brain-predicted age regression in the training dataset.
Set-up
Clear workspace, set colour palette
rm(list = ls()) ## clear workspace
ms.palette <- c("darkgreen", "darkorange", "red", "blue", "purple") # define MS colour scheme for groups
sessionInfo()
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS 10.15.2
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] tidyr_1.0.0 survminer_0.4.3 ggpubr_0.2
## [4] magrittr_1.5 survival_2.43-3 stringr_1.3.1
## [7] sjPlot_2.8.2 sjmisc_2.8.2 scales_1.0.0
## [10] RColorBrewer_1.1-2 qwraps2_0.4.1 psych_1.8.12
## [13] pryr_0.1.4 ppcor_1.1 plyr_1.8.4
## [16] plotrix_3.7-4 metafor_2.0-0 MASS_7.3-51.1
## [19] lmerTest_3.0-1 lme4_1.1-21 Matrix_1.2-15
## [22] lm.beta_1.5-1 knitr_1.21 jtools_1.1.1
## [25] hier.part_1.0-4 gtools_3.8.1 gridExtra_2.3
## [28] glmmTMB_0.2.3 ggplot2_3.2.1 ggstance_0.3.1
## [31] emmeans_1.4.2 effects_4.1-4 dplyr_0.8.3
## [34] data.table_1.11.8 cowplot_1.0.0 corrplot_0.84
## [37] car_3.0-2 carData_3.0-2
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-137 cmprsk_2.2-7 insight_0.8.0
## [4] numDeriv_2016.8-1 backports_1.1.3 tools_3.5.2
## [7] TMB_1.7.15 R6_2.3.0 sjlabelled_1.1.2
## [10] DBI_1.0.0 lazyeval_0.2.1 colorspace_1.3-2
## [13] nnet_7.3-12 withr_2.1.2 tidyselect_0.2.5
## [16] mnormt_1.5-5 curl_3.2 compiler_3.5.2
## [19] performance_0.4.3 cli_1.0.1 bayestestR_0.5.1
## [22] survMisc_0.5.5 mvtnorm_1.0-8 digest_0.6.18
## [25] foreign_0.8-71 minqa_1.2.4 rmarkdown_1.11
## [28] rio_0.5.16 pkgconfig_2.0.2 htmltools_0.3.6
## [31] rlang_0.4.0 readxl_1.2.0 generics_0.0.2
## [34] zoo_1.8-4 zip_1.0.0 parameters_0.4.1
## [37] Rcpp_1.0.2 munsell_0.5.0 abind_1.4-5
## [40] lifecycle_0.1.0 stringi_1.2.4 yaml_2.2.0
## [43] grid_3.5.2 parallel_3.5.2 forcats_0.3.0
## [46] crayon_1.3.4 lattice_0.20-38 ggeffects_0.14.0
## [49] haven_2.0.0 splines_3.5.2 sjstats_0.17.8
## [52] hms_0.4.2 zeallot_0.1.0 pillar_1.3.1
## [55] boot_1.3-21 estimability_1.3 effectsize_0.1.1
## [58] codetools_0.2-16 glue_1.3.1 evaluate_0.12
## [61] mitools_2.4 modelr_0.1.4 vctrs_0.2.0
## [64] nloptr_1.2.1 cellranger_1.1.0 gtable_0.2.0
## [67] purrr_0.3.2 km.ci_0.5-2 assertthat_0.2.0
## [70] xfun_0.4 openxlsx_4.1.0 xtable_1.8-3
## [73] broom_0.5.2 survey_3.36 coda_0.19-3
## [76] tibble_2.1.3 KMsurv_0.1-5
Get data from CSV and define longitudinal data.frames
df <- read.csv("MS_brain_age_final_long.csv")
df$subtype <- factor(df$subtype, levels = c("control", "CIS", "RRMS", "SPMS", "PPMS")) # reorder subtype factor to put controls first
df$Cohort <- recode(df$Cohort, JR1 = "Imperial", C0 = "UCL0", C1 = "UCL1", C2 = "UCL2", C3 = "UCL3", C4 = "UCL4" ,C5 = "UCL5", C6 = "UCL6", C7 = "UCL7", A = "Amsterdam", B = "Barcelona", G = "Graz", M = "Milan", R = "Rome", S = "Siena")
df$wb_vol_ratio_icv <- df$WBV / df$ICV
Get data on Brain Age Healthy Training dataset
banc <- read.csv("/Users/jcole/Work/Brain ageing/BANC_2015/BANC_N2003_age_sex_Brain_age_predictions.csv")
Exclude participants with errors in the database & correct time since diagnosis errors
tmp <- df[df$Cohort == 'Amsterdam',] %>% group_by(PatientID) %>% dplyr::summarize(sd = sd(age_at_scan)) %>% arrange(desc(sd)) %>% filter(sd > 2)
excluded_IDs <- sort(tmp$PatientID)
# excluded_IDs <- unique(df[which(df$age_at_scan < df$age_at_baseline_scan1),1])
excluded_scans <- (df %>% filter(str_detect(PatientID, paste(excluded_IDs, collapse = "|"))))["ScanName"]
df <- df %>%
filter(!str_detect(PatientID, paste(excluded_IDs, collapse = "|")))
rm(tmp)
There were 13 subjects with 38 scans excluded in total. Data entry errors in original spreadsheet; age at baseline not consistent within subject.
Load data for lesion filling analysis
## read in data
lesion_df <- read.csv(file = '~/Work/Brain ageing/Collaborations/MS/MAGNIMS/MAGNIMS20171224_final.csv')
lesion_df$subtype <- factor(lesion_df$subtype, levels = c("control", "CIS", "RRMS", "SPMS", "PPMS"))
## create ICV ratio variables
lesion_df <- lesion_df %>%
mutate(ICV = GMV + WMV + CSFV) %>%
mutate(gm_icv_ratio = GMV/ICV) %>%
mutate(wm_icv_ratio = WMV/ICV)
Recode scanner status variable to field strength
Pre-post 2004 data only available for one site.
table(df$scanner_status)
##
## 1.5T 1.5T_post_2004 1.5T_pre_2004 3T
## 2257 19 447 842
df$field_strength <- recode(df$scanner_status, `1.5T_pre_2004` = "1.5T", `1.5T_post_2004` = "1.5T")
table(df$field_strength)
##
## 1.5T 3T
## 2723 842
Generate baseline only data.frame and show data frame structure
df.bl <- df[(df$age_at_baseline_scan1 == df$age_at_scan),] # define baseline data.frame
str(df)
## 'data.frame': 3565 obs. of 34 variables:
## $ PatientID : Factor w/ 1367 levels "AMSTERDAM_4001",..: 1 1 1 2 2 2 3 3 3 4 ...
## $ Cohort : Factor w/ 15 levels "Amsterdam","Barcelona",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ scanner_status : Factor w/ 4 levels "1.5T","1.5T_post_2004",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ gender : Factor w/ 2 levels "female","male": 2 2 2 1 1 1 1 1 1 1 ...
## $ control0ppms1cisoforever2other3 : int 3 3 3 3 3 3 3 3 3 3 ...
## $ control0rest1 : Factor w/ 2 levels "control","MS": 2 2 2 2 2 2 2 2 2 2 ...
## $ control0ms1cis2 : Factor w/ 3 levels "CIS","control",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ age_at_baseline_scan1 : num 57.8 57.8 57.8 44.4 44.4 ...
## $ disease_duration_at_baseline_scan1: num 6.8 6.8 6.8 6.38 6.38 ...
## $ DMT_YesNoNA : Factor w/ 2 levels "NO","YES": 1 1 1 1 1 1 2 2 2 2 ...
## $ DMTYes1 : Factor w/ 2 levels "No treatment",..: 1 1 1 1 1 1 2 2 2 2 ...
## $ EDSSbaseline : num 2.5 2.5 2.5 3 3 3 4 4 4 2.5 ...
## $ NoScans : int 3 3 3 3 3 3 3 3 3 2 ...
## $ FUTimeMax : num 2.32 2.32 2.32 2.35 2.35 ...
## $ ScanName : Factor w/ 3603 levels "AMSTERDAM_4001_baseline_t1",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ age_at_scan : num 57.8 59.1 60.1 44.4 45.5 ...
## $ EDSSatScan : num 2.5 3.5 3 3 2 2.5 4 2.5 4 2.5 ...
## $ BrainAge : num 69.6 73.4 71 58.1 61.8 ...
## $ BrainPAD : num 11.8 14.3 10.8 13.7 16.3 ...
## $ subtype : Factor w/ 5 levels "control","CIS",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ disease_onset_age : num 51 51 51 38 38 ...
## $ disease_duration : num 6.8 6.8 6.8 6.38 6.38 ...
## $ interval : num 0 1.3 2.32 0 1.16 ...
## $ scan_number : Factor w/ 10 levels "scan1","scan10",..: 1 3 4 1 3 4 1 3 4 1 ...
## $ GM_vol : num 0.574 0.563 0.573 0.622 0.605 0.59 0.647 0.643 0.633 0.648 ...
## $ WM_vol : num 0.536 0.532 0.52 0.43 0.423 0.413 0.404 0.399 0.387 0.599 ...
## $ CSF_vol : num 0.352 0.349 0.367 0.293 0.309 0.333 0.355 0.358 0.386 0.271 ...
## $ WBV : num 1.11 1.09 1.09 1.05 1.03 ...
## $ ICV : num 1.46 1.44 1.46 1.34 1.34 ...
## $ gm_vol_ratio_icv : num 0.393 0.39 0.392 0.462 0.453 ...
## $ wm_vol_ratio_icv : num 0.367 0.368 0.356 0.32 0.316 ...
## $ csf_vol_ratio_icv : num 0.241 0.242 0.251 0.218 0.231 ...
## $ wb_vol_ratio_icv : num 0.759 0.758 0.749 0.782 0.769 ...
## $ field_strength : Factor w/ 2 levels "1.5T","3T": 1 1 1 1 1 1 1 1 1 1 ...
Basic stats
Total number of subjects, and by group
The total number of subjects included was n = 1354
The total number of MS patients (including CIS) was n = 1204 and healthy controls was n = 150
Number of scans in total and per group
Total number of scans = 3565
Number of people with 2 or more scans
df.bl %>%
filter(NoScans >= 2) %>%
group_by(control0rest1) %>%
tally()
## # A tibble: 2 x 2
## control0rest1 n
## <fct> <int>
## 1 control 111
## 2 MS 1155
Number of people with 3 or more scans
df.bl %>%
filter(NoScans >= 3) %>%
group_by(control0rest1) %>%
tally()
## # A tibble: 2 x 2
## control0rest1 n
## <fct> <int>
## 1 control 64
## 2 MS 509
Generate demographics table using qwarps2
options(qwraps2_markup = "markdown", digits = 2)
table1 <-
list("N" =
list("Control" = ~ qwraps2::n_perc0(control0rest1 == "control", na_rm = T),
"MS" = ~ qwraps2::n_perc0(control0rest1 == "MS", na_rm = T)),
"N with follow-up" =
list("Yes" = ~ sum(NoScans>1)),
"Gender" =
list("Female" = ~ qwraps2::n_perc0(gender == "female", show_symbol = T),
"Male" = ~ qwraps2::n_perc0(gender == "male", show_symbol = T)),
"Number of scans" =
list("min" = ~ min(NoScans),
"max" = ~ max(NoScans),
"mean (sd)" = ~ qwraps2::mean_sd(NoScans)),
"Age at baseline scan (years)" =
list("min" = ~ min(age_at_baseline_scan1),
"max" = ~ max(age_at_baseline_scan1),
"mean (sd)" = ~ qwraps2::mean_sd(age_at_baseline_scan1)),
"Brain-predicted age at baseline scan (years)" =
list("min" = ~ min(BrainAge),
"max" = ~ max(BrainAge),
"mean (sd)" = ~ qwraps2::mean_sd(BrainAge)),
"Disease duration at baseline (years)" =
list("min" = ~ min(disease_duration, na.rm = T),
"max" = ~ max(disease_duration, na.rm = T),
"mean (sd)" = ~ qwraps2::mean_sd(disease_duration, na_rm = T, show_n = "never")),
"EDSS at baseline " =
list("min" = ~ min(EDSSbaseline, na.rm = T),
"max" = ~ max(EDSSbaseline, na.rm = T),
"mean (sd)" = ~ qwraps2::mean_sd(EDSSbaseline, na_rm = T, show_n = "never")),
"MS subtype" =
list("CIS" = ~ qwraps2::n_perc0(subtype == "CIS", show_symbol = T),
"RRMS" = ~ qwraps2::n_perc0(subtype == "RRMS", show_symbol = T),
"SPMS" = ~ qwraps2::n_perc0(subtype == "SPMS", show_symbol = T),
"PPMS" = ~ qwraps2::n_perc0(subtype == "PPMS", show_symbol = T)),
"Treatment" =
list("Yes" = ~ qwraps2::n_perc0(DMT_YesNoNA == "YES", na_rm = T, show_symbol = T),
"No" = ~ qwraps2::n_perc0(DMT_YesNoNA == "NO", na_rm = T, show_symbol = T),
"Missing" = ~ sum(is.na(DMT_YesNoNA)))
)
print(summary_table(dplyr::group_by(df.bl, control0rest1), table1),
rtitle = "Sample Chararcteristics",
cnames = c("Controls", "MS patients"))
##
##
## |Sample Chararcteristics |Controls |MS patients |
## |:------------------------------------------------|:--------------------|:--------------------|
## |**N** | | |
## | Control |150 (100) |0 (0) |
## | MS |0 (0) |1,204 (100) |
## |**N with follow-up** | | |
## | Yes |111 |1155 |
## |**Gender** | | |
## | Female |82 (55%) |771 (64%) |
## | Male |68 (45%) |433 (36%) |
## |**Number of scans** | | |
## | min |1 |1 |
## | max |10 |7 |
## | mean (sd) |2.82 ± 1.90 |2.61 ± 1.01 |
## |**Age at baseline scan (years)** | | |
## | min |23 |15 |
## | max |66 |68 |
## | mean (sd) |37.29 ± 9.96 |39.41 ± 10.76 |
## |**Brain-predicted age at baseline scan (years)** | | |
## | min |14.5 |7.4 |
## | max |70 |92 |
## | mean (sd) |38.43 ± 11.12 |50.27 ± 14.90 |
## |**Disease duration at baseline (years)** | | |
## | min |Inf |0 |
## | max |-Inf |48 |
## | mean (sd) |NaN ± NA |7.26 ± 7.96 |
## |**EDSS at baseline ** | | |
## | min |Inf |0 |
## | max |-Inf |9 |
## | mean (sd) |NaN ± NA |2.60 ± 1.95 |
## |**MS subtype** | | |
## | CIS |0 (0%) |296 (25%) |
## | RRMS |0 (0%) |677 (56%) |
## | SPMS |0 (0%) |111 (9%) |
## | PPMS |0 (0%) |120 (10%) |
## |**Treatment** | | |
## | Yes |0 (NaN%) |475 (41%) |
## | No |0 (NaN%) |675 (59%) |
## | Missing |150 |54 |
Length of follow-up
Get length of follow-up from longitudinal database.
options(digits = 3) ## return digits option to default
df %>%
filter(NoScans >= 2) %>%
group_by(PatientID) %>%
slice(which.max(interval)) %>%
# top_n(n = 1, wt = interval) %>%
group_by(control0rest1) %>%
dplyr::summarise(mean(interval), sd(interval), min(interval), max(interval))
## # A tibble: 2 x 5
## control0rest1 `mean(interval)` `sd(interval)` `min(interval)`
## <fct> <dbl> <dbl> <dbl>
## 1 control 1.97 1.38 0.5
## 2 MS 3.41 3.15 0.233
## # … with 1 more variable: `max(interval)` <dbl>
options(digits = 7) ## return digits option to default
Demographics by MS subtype
options(qwraps2_markup = "markdown", digits = 2)
table1 <-
list("N with follow-up" =
list("Yes" = ~ sum(NoScans>1)),
"Gender" =
list("Female" = ~ qwraps2::n_perc0(gender == "female", show_symbol = T),
"Male" = ~ qwraps2::n_perc0(gender == "male", show_symbol = T)),
"Number of scans" =
list("min" = ~ min(NoScans),
"max" = ~ max(NoScans),
"mean (sd)" = ~ qwraps2::mean_sd(NoScans)),
"Age at baseline scan (years)" =
list("min" = ~ min(age_at_baseline_scan1),
"max" = ~ max(age_at_baseline_scan1),
"mean (sd)" = ~ qwraps2::mean_sd(age_at_baseline_scan1)),
"Brain-predicted age at baseline scan (years)" =
list("min" = ~ min(BrainAge),
"max" = ~ max(BrainAge),
"mean (sd)" = ~ qwraps2::mean_sd(BrainAge)),
"Disease duration at baseline (years)" =
list("min" = ~ min(disease_duration, na.rm = T),
"max" = ~ max(disease_duration, na.rm = T),
"mean (sd)" = ~ qwraps2::mean_sd(disease_duration, na_rm = T, show_n = "never")),
"EDSS at baseline " =
list("min" = ~ min(EDSSbaseline, na.rm = T),
"max" = ~ max(EDSSbaseline, na.rm = T),
"mean (sd)" = ~ qwraps2::mean_sd(EDSSbaseline, na_rm = T, show_n = "never")),
"Treatment" =
list("Yes" = ~ qwraps2::n_perc0(DMT_YesNoNA == "YES", na_rm = T, show_symbol = T),
"No" = ~ qwraps2::n_perc0(DMT_YesNoNA == "NO", na_rm = T, show_symbol = T),
"Missing" = ~ sum(is.na(DMT_YesNoNA)))
)
print(summary_table(dplyr::group_by(df.bl, subtype), table1),
rtitle = "Sample Chararcteristics",
cnames = c("Controls", "CIS", "RRMS", "SPMS", "PPMS"))
##
##
## |Sample Chararcteristics |Controls |CIS |RRMS |SPMS |PPMS |
## |:------------------------------------------------|:--------------------|:--------------------|:--------------------|:--------------------|:--------------------|
## |**N with follow-up** | | | | | |
## | Yes |111 |279 |653 |104 |119 |
## |**Gender** | | | | | |
## | Female |82 (55%) |199 (67%) |453 (67%) |67 (60%) |52 (43%) |
## | Male |68 (45%) |97 (33%) |224 (33%) |44 (40%) |68 (57%) |
## |**Number of scans** | | | | | |
## | min |1 |1 |1 |1 |1 |
## | max |10 |5 |7 |3 |5 |
## | mean (sd) |2.82 ± 1.90 |2.44 ± 0.98 |2.71 ± 1.05 |2.25 ± 0.56 |2.80 ± 1.07 |
## |**Age at baseline scan (years)** | | | | | |
## | min |23 |15 |18 |30 |19 |
## | max |66 |55 |66 |68 |65 |
## | mean (sd) |37.29 ± 9.96 |33.01 ± 8.08 |38.83 ± 9.68 |50.11 ± 9.39 |48.55 ± 10.04 |
## |**Brain-predicted age at baseline scan (years)** | | | | | |
## | min |14.5 |13.7 |7.4 |40.2 |31.0 |
## | max |70 |82 |92 |89 |84 |
## | mean (sd) |38.43 ± 11.12 |37.60 ± 10.01 |51.33 ± 13.32 |67.36 ± 10.42 |59.77 ± 10.90 |
## |**Disease duration at baseline (years)** | | | | | |
## | min |Inf |0.0 |0.0 |2.5 |1.0 |
## | max |-Inf |18 |42 |48 |27 |
## | mean (sd) |NaN ± NA |0.52 ± 1.50 |7.67 ± 7.31 |17.44 ± 9.04 |6.65 ± 5.63 |
## |**EDSS at baseline ** | | | | | |
## | min |Inf |0 |0 |3 |2 |
## | max |-Inf |4.5 |6.5 |9.0 |8.0 |
## | mean (sd) |NaN ± NA |1.36 ± 1.02 |2.12 ± 1.40 |5.83 ± 1.20 |5.10 ± 1.32 |
## |**Treatment** | | | | | |
## | Yes |0 (NaN%) |60 (20%) |356 (56%) |51 (50%) |8 (7%) |
## | No |0 (NaN%) |234 (80%) |285 (44%) |52 (50%) |104 (93%) |
## | Missing |150 |2 |36 |8 |8 |
Length of follow-up
Get length of follow-up from longitudinal database.
options(digits = 3) ## return digits option to default
df %>%
filter(NoScans >= 2) %>%
group_by(PatientID) %>%
slice(which.max(interval)) %>%
# top_n(n = 1, wt = interval) %>%
group_by(subtype) %>%
dplyr::summarise(mean(interval), sd(interval), min(interval), max(interval))
## # A tibble: 5 x 5
## subtype `mean(interval)` `sd(interval)` `min(interval)` `max(interval)`
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 control 1.97 1.38 0.5 6
## 2 CIS 3.63 4.04 0.233 15
## 3 RRMS 3.57 3.10 0.447 15
## 4 SPMS 2.43 1.12 0.505 5.50
## 5 PPMS 2.86 1.65 0.833 6
options(digits = 7) ## return digits option to default
Correlations between demographics and clinical variables
Use corrplot package.
cor.mat <- cbind(df.bl$age_at_scan, df.bl$disease_onset_age, df.bl$disease_duration, df.bl$EDSSatScan)
colnames(cor.mat) <- c("Age at scan", "Age at diagnosis", "Time since diagnosis", "EDSS at scan")
corrplot(cor(cor.mat, use = "pairwise"), type = "upper", method = "color", addCoef.col = T, tl.col = "black", diag = F)

Age histograms
p <- ggplot() + xlab("Age (years)") + xlim(c(18,90)) + theme_cowplot()
banc_plot <- p + geom_histogram(aes(x = age), fill = "darkgoldenrod2", binwidth = 2, data = banc) + labs(title = "Training data", subtitle = "Healthy participants")
ms_plot <- p + geom_histogram(aes(x = age_at_scan), fill = "firebrick", binwidth = 2, data = subset(df.bl, df.bl$control0rest1 == "MS")) + labs(title = "Test data", subtitle = "MS and CIS patients")
hc_plot <- p + geom_histogram(aes(x = age_at_scan), fill = "darkgreen", binwidth = 2, data = subset(df.bl, df.bl$control0rest1 == "control")) + labs(title = "Test data", subtitle = "Healthy controls")
p2 <- cowplot::plot_grid(banc_plot, ms_plot, hc_plot, labels = "AUTO", nrow = 1)
p2

cowplot::save_plot("~/Work/Articles/Brain age/MS/figures/BANC_MAGNIMS_age_histograms.pdf", p2, base_asp = 2.5)
Baseline brain-age analysis
describeBy(df.bl$BrainPAD, df.bl$control0rest1, mat = T, digits = 3) # brain-PAD by MS patient vs. controls
## item group1 vars n mean sd median trimmed mad min
## X11 1 control 1 150 1.135 6.738 0.642 1.173 6.510 -16.507
## X12 2 MS 1 1204 10.875 10.237 9.443 10.246 9.684 -18.181
## max range skew kurtosis se
## X11 22.090 38.597 0.032 0.072 0.550
## X12 48.666 66.847 0.600 0.315 0.295
Evalute potential covariates
table(df.bl$Cohort, df.bl$field_strength)
##
## 1.5T 3T
## Amsterdam 193 0
## Barcelona 62 90
## UCL0 0 21
## UCL1 148 6
## UCL2 0 16
## UCL3 0 147
## UCL4 0 35
## UCL5 50 0
## UCL6 59 0
## UCL7 34 0
## Graz 174 0
## Imperial 0 25
## Milan 60 54
## Rome 71 0
## Siena 109 0
fit <- lm(BrainPAD ~ poly(age_at_baseline_scan1, 2) + gender + ICV + field_strength + Cohort, data = df.bl)
anova(fit)
## Analysis of Variance Table
##
## Response: BrainPAD
## Df Sum Sq Mean Sq F value Pr(>F)
## poly(age_at_baseline_scan1, 2) 2 445 222.73 2.3603 0.094788 .
## gender 1 741 741.38 7.8563 0.005138 **
## ICV 1 12 11.93 0.1264 0.722278
## field_strength 1 693 692.58 7.3392 0.006833 **
## Cohort 14 17705 1264.64 13.4013 < 2.2e-16 ***
## Residuals 1334 125886 94.37
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Hierarchical partitioning of brain-PAD
a <- lm(BrainPAD ~ poly(age_at_baseline_scan1, 2) + gender + wb_vol_ratio_icv + field_strength + Cohort, data = df.bl)
summary(a)
##
## Call:
## lm(formula = BrainPAD ~ poly(age_at_baseline_scan1, 2) + gender +
## wb_vol_ratio_icv + field_strength + Cohort, data = df.bl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.641 -4.486 -0.235 4.181 32.055
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 142.6576 3.7241 38.307 < 2e-16 ***
## poly(age_at_baseline_scan1, 2)1 -164.4998 8.7462 -18.808 < 2e-16 ***
## poly(age_at_baseline_scan1, 2)2 -11.9125 7.0751 -1.684 0.092471 .
## gendermale -1.1586 0.4034 -2.872 0.004142 **
## wb_vol_ratio_icv -164.4643 4.5651 -36.027 < 2e-16 ***
## field_strength3T -1.7440 0.8259 -2.112 0.034907 *
## CohortBarcelona -3.6913 0.9170 -4.025 6.01e-05 ***
## CohortUCL0 0.7741 1.8032 0.429 0.667782
## CohortUCL1 -2.1112 0.7838 -2.694 0.007157 **
## CohortUCL2 -0.8549 2.0068 -0.426 0.670182
## CohortUCL3 -3.0754 1.1239 -2.736 0.006297 **
## CohortUCL4 -5.4675 1.5208 -3.595 0.000336 ***
## CohortUCL5 -1.7564 1.1165 -1.573 0.115914
## CohortUCL6 -1.7447 1.0339 -1.688 0.091731 .
## CohortUCL7 -7.3669 1.3185 -5.587 2.79e-08 ***
## CohortGraz -4.9941 0.7700 -6.486 1.24e-10 ***
## CohortImperial -5.4688 1.6925 -3.231 0.001263 **
## CohortMilan 4.6601 0.9195 5.068 4.58e-07 ***
## CohortRome -0.6036 0.9675 -0.624 0.532844
## CohortSiena 4.1644 0.8522 4.887 1.15e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.916 on 1334 degrees of freedom
## Multiple R-squared: 0.5614, Adjusted R-squared: 0.5552
## F-statistic: 89.87 on 19 and 1334 DF, p-value: < 2.2e-16
h.p <- hier.part(y = df.bl$BrainPAD,
xcan = df.bl[c("age_at_baseline_scan1","gender", "wb_vol_ratio_icv", "field_strength", "Cohort")],
gof = "Rsqu",
barplot = FALSE)
round(h.p$IJ, 3)
## I J Total
## age_at_baseline_scan1 0.056 -0.056 0.000
## gender 0.004 0.001 0.005
## wb_vol_ratio_icv 0.389 -0.044 0.345
## field_strength 0.006 -0.002 0.004
## Cohort 0.105 0.011 0.116
round(h.p$I.perc, 1)
## I
## age_at_baseline_scan1 10.0
## gender 0.7
## wb_vol_ratio_icv 69.3
## field_strength 1.1
## Cohort 18.8
bar.colors <- c("darkgoldenrod2", "grey")
barplot(t(as.matrix(h.p$IJ[,1:2])),
names.arg = c("Age", "Sex", "NBV", "Field strength", "Cohort"),
col = bar.colors,
ylim = c(0,0.4),
xlab = "Model predictors",
ylab = expression(paste("Brain-PAD variance explained R"^"2")))
legend("topright", c("Unique variance", "Shared variance"), fill = bar.colors, bty = "n", title = "Legend")

Results suggest that age, age^2, normalised brain volume (AKA wb_vol_ratio_icv), gender, Cohort and field strength are appropriate covariates. # Predict brain-PAD based on group Function to run a linear mixed effect (LME) model adjusting for: fixed effects of age, age^2, gender and field strength; random effects of cohort.
run_lm <- function(var, data) {
m1 <- lmer(BrainPAD ~ data[[var]] +
poly(age_at_scan, 2) +
gender +
field_strength +
wb_vol_ratio_icv +
(1|Cohort),
data = data,
control = lmerControl(optimizer = "Nelder_Mead"))
return(m1)
}
Main effect of group (MS vs. controls)
fit <- run_lm("control0rest1", df.bl)
summary(fit)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +
## wb_vol_ratio_icv + (1 | Cohort)
## Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 9034.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.1381 -0.6419 -0.0329 0.6186 4.7265
##
## Random effects:
## Groups Name Variance Std.Dev.
## Cohort (Intercept) 8.643 2.940
## Residual 45.856 6.772
## Number of obs: 1354, groups: Cohort, 15
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 129.6722 3.9073 1183.7942 33.187 < 2e-16 ***
## data[[var]]MS 6.0432 0.7698 1265.8978 7.850 8.82e-15 ***
## poly(age_at_scan, 2)1 -166.3679 8.5264 1347.0000 -19.512 < 2e-16 ***
## poly(age_at_scan, 2)2 -15.3559 6.9240 1342.9439 -2.218 0.0267 *
## gendermale -0.9364 0.3956 1338.6090 -2.367 0.0181 *
## field_strength3T -1.8549 0.7352 408.4067 -2.523 0.0120 *
## wb_vol_ratio_icv -156.8503 4.5515 1346.9993 -34.461 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) d[[]]M p(__,2)1 p(__,2)2 gndrml fld_3T
## dat[[vr]]MS -0.366
## ply(g__,2)1 -0.430 -0.040
## ply(g__,2)2 0.049 -0.063 0.010
## gendermale -0.240 0.070 0.121 0.049
## fld_strng3T -0.026 0.012 -0.009 -0.025 -0.051
## wb_vl_rt_cv -0.961 0.212 0.466 -0.042 0.209 -0.051
Estimated marginal means
Generate EMMs for all MS/CIS and healthy controls. Need to re-fit same model as above so that lme4 object can be read by emmeans.
fit <- lmer(BrainPAD ~ control0rest1 +
poly(age_at_scan, 2) +
gender +
field_strength +
wb_vol_ratio_icv +
(1|Cohort),
data = df.bl,
control = lmerControl(optimizer = "Nelder_Mead"))
emmeans(object = fit, pairwise ~ control0rest1)
## $emmeans
## control0rest1 emmean SE df lower.CL upper.CL
## control 4.25 1.04 36.8 2.14 6.36
## MS 10.29 0.83 15.8 8.53 12.05
##
## Results are averaged over the levels of: gender, field_strength
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## control - MS -6.04 0.774 1261 -7.810 <.0001
##
## Results are averaged over the levels of: gender, field_strength
## Degrees-of-freedom method: kenward-roger
Meta-analysis looking at all the separate cohorts with MS/CIS patients and controls
Check which cohorts contain healthy controls and patients.
table(df.bl$subtype, df.bl$Cohort)
##
## Amsterdam Barcelona UCL0 UCL1 UCL2 UCL3 UCL4 UCL5 UCL6 UCL7 Graz
## control 0 0 0 0 0 82 16 17 17 10 0
## CIS 4 84 0 75 0 0 0 0 0 24 95
## RRMS 131 63 21 77 0 28 0 33 0 0 69
## SPMS 36 5 0 2 7 23 0 0 0 0 9
## PPMS 22 0 0 0 9 14 19 0 42 0 1
##
## Imperial Milan Rome Siena
## control 8 0 0 0
## CIS 0 11 1 2
## RRMS 16 66 66 107
## SPMS 1 24 4 0
## PPMS 0 13 0 0
Forest plot of results
plot.forest %<a-% {
forest(meta.results, ilab = cbind(meta.df$MS_n, meta.df$control_n), ilab.xpos = c(-30,-23), slab = meta.df$Cohort, digits = 1, xlab = "MS vs. Healthy control group mean difference", steps = 6, col = "red", cex = 1.25, pch = 22, bg = "blue"); text(c(-40, -30, -23), 7.6, c("Cohort", "MS n", "HC n"), font = 2, cex = 1.25)
}
plot.forest

cairo_pdf("plots/forest_plot.pdf", 6,5)
plot.forest
dev.off()
## quartz_off_screen
## 2
Linear regression analysis in cohort UCL3
Includes covariates: age, age^2, gender.
summary(lm(BrainPAD ~ control0rest1 + poly(age_at_baseline_scan1, 2) + gender + wb_vol_ratio_icv, data = subset(df.bl, df.bl$Cohort == "UCL3")))
##
## Call:
## lm(formula = BrainPAD ~ control0rest1 + poly(age_at_baseline_scan1,
## 2) + gender + wb_vol_ratio_icv, data = subset(df.bl, df.bl$Cohort ==
## "UCL3"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.7410 -3.8178 0.1203 4.0826 14.3160
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 98.7584 9.9120 9.963 < 2e-16 ***
## control0rest1MS 9.2958 1.3312 6.983 1.04e-10 ***
## poly(age_at_baseline_scan1, 2)1 -46.2627 7.5736 -6.108 9.27e-09 ***
## poly(age_at_baseline_scan1, 2)2 0.2071 6.3500 0.033 0.974
## gendermale -0.7397 1.1030 -0.671 0.504
## wb_vol_ratio_icv -121.3744 12.0628 -10.062 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.3 on 141 degrees of freedom
## Multiple R-squared: 0.6724, Adjusted R-squared: 0.6608
## F-statistic: 57.89 on 5 and 141 DF, p-value: < 2.2e-16
Brain-PAD by MS subtype
Using a linear mixed effect (LME) model, to compare subtypes. This analysis excluded controls. Adjusting for age, age^2, gender, field strength, normalised brain volume and cohort.
subtypes <- run_lm("subtype", subset(df.bl, df.bl$subtype != "control"))
summary(subtypes)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +
## wb_vol_ratio_icv + (1 | Cohort)
## Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 7969.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8602 -0.6520 -0.0126 0.6072 4.5227
##
## Random effects:
## Groups Name Variance Std.Dev.
## Cohort (Intercept) 5.353 2.314
## Residual 43.862 6.623
## Number of obs: 1204, groups: Cohort, 15
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 126.1933 3.9556 1133.0602 31.902 < 2e-16 ***
## data[[var]]RRMS 5.1218 0.5834 927.4206 8.779 < 2e-16 ***
## data[[var]]SPMS 6.5847 0.9381 1127.8282 7.019 3.85e-12 ***
## data[[var]]PPMS 4.5069 1.0695 370.4821 4.214 3.15e-05 ***
## poly(age_at_scan, 2)1 -172.6828 8.6976 1193.8555 -19.854 < 2e-16 ***
## poly(age_at_scan, 2)2 -16.5325 6.8839 1189.0349 -2.402 0.0165 *
## gendermale -0.7640 0.4149 1188.4981 -1.841 0.0658 .
## field_strength3T -0.6255 0.7130 230.2779 -0.877 0.3813
## wb_vol_ratio_icv -150.8232 4.7561 1194.8731 -31.711 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) d[[]]R d[[]]S d[[]]P p(__,2)1 p(__,2)2 gndrml fld_3T
## dt[[v]]RRMS -0.282
## dt[[v]]SPMS -0.334 0.560
## dt[[v]]PPMS -0.194 0.498 0.478
## ply(g__,2)1 -0.365 -0.063 -0.182 -0.212
## ply(g__,2)2 0.056 0.051 -0.108 -0.096 0.055
## gendermale -0.218 0.010 -0.007 -0.104 0.129 0.059
## fld_strng3T -0.073 0.202 0.058 0.120 -0.021 -0.010 -0.053
## wb_vl_rt_cv -0.974 0.162 0.258 0.098 0.398 -0.060 0.197 -0.016
anova(subtypes)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## data[[var]] 3701 1234 3 773.45 28.1258 < 2e-16 ***
## poly(age_at_scan, 2) 17365 8683 2 1191.15 197.9519 < 2e-16 ***
## gender 149 149 1 1188.50 3.3909 0.06581 .
## field_strength 34 34 1 230.28 0.7696 0.38126
## wb_vol_ratio_icv 44108 44108 1 1194.87 1005.6082 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Brain-PAD estimated marginal means for subtypes
Generate EMMs for all MS subtypes.
fit_subtype <- lmer(BrainPAD ~ subtype +
poly(age_at_scan, 2) +
gender +
field_strength +
wb_vol_ratio_icv +
(1|Cohort),
# data = subset(df.bl, df.bl$control0rest1 != "control"),
data = df.bl,
control = lmerControl(optimizer = "Nelder_Mead"))
emmeans(object = fit_subtype, pairwise ~ subtype)
## $emmeans
## subtype emmean SE df lower.CL upper.CL
## control 4.27 0.901 49.8 2.47 6.08
## CIS 6.43 0.802 32.7 4.79 8.06
## RRMS 11.57 0.691 19.7 10.13 13.02
## SPMS 13.02 0.955 71.9 11.11 14.92
## PPMS 10.44 0.974 63.8 8.49 12.38
##
## Results are averaged over the levels of: gender, field_strength
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## control - CIS -2.15 0.916 724 -2.350 0.1305
## control - RRMS -7.30 0.811 868 -9.001 <.0001
## control - SPMS -8.74 1.013 1227 -8.631 <.0001
## control - PPMS -6.16 0.952 1296 -6.474 <.0001
## CIS - RRMS -5.15 0.575 1100 -8.949 <.0001
## CIS - SPMS -6.59 0.916 1295 -7.194 <.0001
## CIS - PPMS -4.01 1.011 659 -3.966 0.0008
## RRMS - SPMS -1.44 0.764 1342 -1.891 0.3227
## RRMS - PPMS 1.14 0.871 700 1.303 0.6893
## SPMS - PPMS 2.58 0.986 1110 2.618 0.0678
##
## Results are averaged over the levels of: gender, field_strength
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
Save table for importing into Word
x <- as.data.frame(emmeans(object = fit_subtype, pairwise ~ subtype)$contrasts)
write.table(format(x, digits = 3), file = "~/Work/Articles/Brain age/MS/Annals Neurology/pairwise_brain_PAD.txt", sep = ",", quote = F, row.names = F)
Plot the EMMs and confidence intervals
plot_model(fit_subtype, type = "pred", terms = c("subtype")) +
theme_cowplot()

Brain-PAD boxplot by MS subtype
# calculate Ns
control_n <- with(df.bl, table(subtype))["control"]
cis_n <- with(df.bl, table(subtype))["CIS"]
rrms_n <- with(df.bl, table(subtype))["RRMS"]
spms_n <- with(df.bl, table(subtype))["SPMS"]
ppms_n <- with(df.bl, table(subtype))["PPMS"]
plot.pryr %<a-% {
with(df.bl, ehplot(BrainPAD, groups = subtype, ylim = c(-20,52), pch = 21, bg = ms.palette[as.numeric(subtype)], box = T, offset = 0.1, intervals = 50, xlab = "Groups", ylab = "Brain-PAD (years)", main = "Baseline Brain-PAD by study group")) +
abline(0,0, lty = 2) +
text(1,51, paste("N = ", control_n, sep = ""), cex = 0.8) +
text(2,51, paste("N = ", cis_n, sep = ""), cex = 0.8) +
text(3,51, paste("N = ", rrms_n, sep = ""), cex = 0.8) +
text(4,51, paste("N = ", spms_n, sep = ""), cex = 0.8) +
text(5,51, paste("N = ", ppms_n, sep = ""), cex = 0.8)
}
plot.pryr

## integer(0)
cairo_pdf("plots/MS_UCL_MAGNIMS_combined_group_Brain-PAD.pdf", 8,8)
plot.pryr
## integer(0)
dev.off()
## quartz_off_screen
## 2
Brain-PAD boxplot by cohort and by MS subtype
Also, Estimate marginal means plot
Uses sjPlot to calculated EMMs from linear regression model
p1 <- ggplot(df.bl, aes(x = subtype, y = BrainPAD, fill = subtype)) +
geom_boxplot() +
facet_wrap(df.bl$Cohort, nrow = 1) +
geom_hline(aes(yintercept = 0), lty = 2) +
theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme(legend.position = "none") +
scale_fill_manual(values = ms.palette)
fit <- lm(BrainPAD ~ control0rest1 + poly(age_at_baseline_scan1, 2) + gender + field_strength + wb_vol_ratio_icv + Cohort, data = df.bl)
p2 <- plot_model(fit, type = "emm", terms = c("Cohort", "control0rest1"), colors = c("darkgreen", "firebrick"), show.legend = F) + theme_cowplot()
plot_grid(list(p1,p2))

ggsave("~/Work/Brain ageing/Collaborations/MS/plots/brainPAD_cohort_subtype_boxplot_EMM_plot.pdf", width = 15, height = 10, useDingbats = FALSE)
Brain-PAD boxplot by MS subtype in cohort UCL3 only
# calculate Ns
C3.bl <- df.bl %>% filter(Cohort == "UCL3")
C3.bl$subtype <- factor(C3.bl$subtype)
control_n <- with(C3.bl, table(subtype))["control"]
rrms_n <- with(C3.bl, table(subtype))["RRMS"]
spms_n <- with(C3.bl, table(subtype))["SPMS"]
ppms_n <- with(C3.bl, table(subtype))["PPMS"]
plot.pryr %<a-% {
with(C3.bl, ehplot(BrainPAD, groups = subtype, ylim = c(-20,52), pch = 21, bg = ms.palette[-2][as.numeric(subtype)], box = T, offset = 0.1, intervals = 50, xlab = "Groups", ylab = "Brain-PAD (years)", main = "Baseline Brain-PAD by study group")) +
abline(0,0, lty = 2) +
text(1,51, paste("N = ", control_n, sep = ""), cex = 0.8) +
text(2,51, paste("N = ", rrms_n, sep = ""), cex = 0.8) +
text(3,51, paste("N = ", spms_n, sep = ""), cex = 0.8) +
text(4,51, paste("N = ", ppms_n, sep = ""), cex = 0.8)
}
plot.pryr

## integer(0)
cairo_pdf("plots/MS_cohort_C3_Brain-PAD.pdf", 8,8)
plot.pryr
## integer(0)
dev.off()
## quartz_off_screen
## 2
Cohort UCL3 statistics
Use OLS regression, no random effects necessary.
fit_UCL3 <- lm(BrainPAD ~ subtype +
poly(age_at_scan, 2) +
gender +
wb_vol_ratio_icv,
data = subset(df.bl, df.bl$Cohort == "UCL3"))
anova(fit_UCL3)
## Analysis of Variance Table
##
## Response: BrainPAD
## Df Sum Sq Mean Sq F value Pr(>F)
## subtype 3 7250.8 2416.9 60.4945 < 2e-16 ***
## poly(age_at_scan, 2) 2 492.7 246.3 6.1655 0.00272 **
## gender 1 146.8 146.8 3.6745 0.05730 .
## wb_vol_ratio_icv 1 3638.6 3638.6 91.0720 < 2e-16 ***
## Residuals 139 5553.5 40.0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(object = fit_UCL3, pairwise ~ subtype)
## $emmeans
## subtype emmean SE df lower.CL upper.CL
## control 3.04 1.00 139 1.07 5.02
## RRMS 11.68 1.34 139 9.04 14.32
## SPMS 13.66 1.70 139 10.29 17.02
## PPMS 12.75 1.82 139 9.14 16.35
##
## Results are averaged over the levels of: gender
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## control - RRMS -8.638 1.55 139 -5.579 <.0001
## control - SPMS -10.611 1.95 139 -5.455 <.0001
## control - PPMS -9.702 2.00 139 -4.859 <.0001
## RRMS - SPMS -1.974 1.93 139 -1.022 0.7371
## RRMS - PPMS -1.064 2.15 139 -0.496 0.9599
## SPMS - PPMS 0.909 2.21 139 0.412 0.9764
##
## Results are averaged over the levels of: gender
## P value adjustment: tukey method for comparing a family of 4 estimates
Brain-PAD by subtype descriptive statistics
with(df.bl, describeBy(BrainPAD, subtype, mat = T, digits = 1)) # brain-PAD by MS patient subtypes and controls
## item group1 vars n mean sd median trimmed mad min max range
## X11 1 control 1 150 1.1 6.7 0.6 1.2 6.5 -16.5 22.1 38.6
## X12 2 CIS 1 296 4.6 7.2 4.3 4.3 6.5 -13.1 31.5 44.5
## X13 3 RRMS 1 677 12.5 10.0 11.1 12.0 9.4 -18.2 48.7 66.8
## X14 4 SPMS 1 111 17.3 10.5 16.9 17.1 11.1 -6.7 43.9 50.6
## X15 5 PPMS 1 120 11.2 10.3 10.3 10.7 10.6 -5.3 46.4 51.7
## skew kurtosis se
## X11 0.0 0.1 0.6
## X12 0.5 0.9 0.4
## X13 0.5 0.3 0.4
## X14 0.2 -0.4 1.0
## X15 0.5 0.1 0.9
Lesion filling
To establish whether using the FSL lesion filling software influences brain-predicted age values. This analysis was conducted only in UCL patients (n = 575).
with(subset(lesion_df, lesion_df$subtype != "control"), describeBy(filled_brain_age, subtype))
##
## Descriptive statistics by group
## group: control
## NULL
## --------------------------------------------------------
## group: CIS
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 8 42.4 6.18 42.57 42.4 7.87 34.3 49.69 15.39 -0.09 -1.98
## se
## X1 2.18
## --------------------------------------------------------
## group: RRMS
## vars n mean sd median trimmed mad min max range skew
## X1 1 382 54.78 11.46 54.79 54.78 12.12 24.26 87.53 63.27 -0.03
## kurtosis se
## X1 -0.44 0.59
## --------------------------------------------------------
## group: SPMS
## vars n mean sd median trimmed mad min max range skew
## X1 1 119 64.62 9.24 65.96 65.41 8.45 40.33 81.64 41.31 -0.74
## kurtosis se
## X1 -0.03 0.85
## --------------------------------------------------------
## group: PPMS
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 66 62.2 10.13 59.63 61.67 10.57 44.4 84.13 39.72 0.46 -0.61
## se
## X1 1.25
Correlation between brain-predicted age from filled and unfilled images: Pearson’s r = 0.994. Median absolute error (MAE) between brain-predicted age from filled and unfilled images = 0.3717 years. Mean difference between brain-predicted age from filled and unfilled images = 0.28 ± 1.29 years.
Lesion filled vs. unfilled scatterplot
ggplot(data = subset(lesion_df, lesion_df$subtype != "control"), aes(x = brain_age, y = filled_brain_age)) +
geom_abline(slope = 1) +
geom_point(pch = 21, aes(fill = subtype), size = 2) +
labs(x = "Unfilled brain-predicted age (years)", y = "Lesion-filled brain-predicted age (years)") +
xlim(c(20,90)) +
scale_fill_manual(values = ms.palette[-1]) +
theme_cowplot() + theme(legend.position = c(0.8, 0.2))
## Warning: Removed 1441 rows containing missing values (geom_point).

ggsave("~/Work/Brain ageing/Collaborations/MS/plots/lesion_filling_brain_age_plot.pdf", width = 5, height = 5, useDingbats = FALSE)
## Warning: Removed 1441 rows containing missing values (geom_point).
Lesion filled vs. unfilled Bland-Altman plot
mean.diff <- mean(lesion_df$brain_age - lesion_df$filled_brain_age, na.rm = T)
sd.diff <- sd(lesion_df$brain_age - lesion_df$filled_brain_age, na.rm = T)
ggplot(data = subset(lesion_df, lesion_df$subtype != "control"), aes(x = ((brain_age + filled_brain_age)/2), y = brain_age - filled_brain_age)) +
geom_abline(slope = 0, lty = 2) +
geom_point(pch = 21, aes(fill = subtype), size = 2) +
geom_hline(yintercept = mean.diff, color = "darkgoldenrod1", lwd = 1) + # mean difference line
geom_hline(yintercept = mean.diff + 1.96*sd.diff, color = "darkgoldenrod2", lty = 2) + # upper 95% line
geom_hline(yintercept = mean.diff - 1.96*sd.diff, color = "darkgoldenrod2", lty = 2) + # lower 95% line
# geom_smooth(method = "lm", level = 0.95, color = "black", lwd = 0.3) +
labs(x = "Mean of filled/unfilled brain-predicted age (years)", y = "Unfilled - Lesion-filled brain-predicted age (years)") +
# ylim(c(-20,20)) +
scale_fill_manual(values = ms.palette[-1]) +
theme_cowplot() + theme(legend.position = c(0.1, 0.8)) +
annotate("text", x = 25, y = mean.diff + 1.96*sd.diff + 0.5, label = "+1.96*SD", color = "darkgoldenrod2") +
annotate("text", x = 25, y = mean.diff - 1.96*sd.diff - 0.5, label = "-1.96*SD", color = "darkgoldenrod2") +
annotate("text", x = 27.5, y = mean.diff + 0.8, label = "Mean difference", color = "darkgoldenrod2")
## Warning: Removed 1441 rows containing missing values (geom_point).

ggsave("~/Work/Brain ageing/Collaborations/MS/plots/lesion_filling_brain_age_BA_plot.pdf", width = 8, height = 5, useDingbats = FALSE)
## Warning: Removed 1441 rows containing missing values (geom_point).
Correlates of brain-PAD at baseline
EDSS score, an index of disability
LME model accounting for fixed effects of age at baseline, age^2, gender, field strength, normalised brain volume and random effects of Cohort.
summary(run_lm("EDSSbaseline", df.bl))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +
## wb_vol_ratio_icv + (1 | Cohort)
## Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 7813.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9891 -0.6338 -0.0466 0.6106 4.8192
##
## Random effects:
## Groups Name Variance Std.Dev.
## Cohort (Intercept) 8.433 2.904
## Residual 45.055 6.712
## Number of obs: 1174, groups: Cohort, 14
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 133.8648 4.1224 1061.6482 32.473 < 2e-16 ***
## data[[var]] 0.6369 0.1416 1116.0234 4.497 7.61e-06 ***
## poly(age_at_scan, 2)1 -186.5305 9.3459 1165.0388 -19.959 < 2e-16 ***
## poly(age_at_scan, 2)2 -25.6213 7.3294 1160.9933 -3.496 0.000491 ***
## gendermale -0.9577 0.4216 1159.8982 -2.272 0.023284 *
## field_strength3T -1.7902 0.7337 396.6175 -2.440 0.015124 *
## wb_vol_ratio_icv -156.4270 4.9237 1165.1857 -31.770 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dt[[]] p(__,2)1 p(__,2)2 gndrml fld_3T
## data[[var]] -0.374
## ply(g__,2)1 -0.333 -0.224
## ply(g__,2)2 0.082 -0.127 0.052
## gendermale -0.221 0.007 0.108 0.049
## fld_strng3T -0.035 0.031 -0.012 -0.028 -0.051
## wb_vl_rt_cv -0.972 0.290 0.366 -0.076 0.196 -0.041
When predicting brain-PAD in a LME model, the effect of EDSS at baseline beta = 0.64, 95% CI = 0.36, 0.91, p = 7.605e-06.
Testing for a polynomial fit of EDSS baseline
fit.edss.poly <- lmer(BrainPAD ~
poly(EDSSbaseline,2) +
poly(age_at_scan, 2) +
gender +
field_strength +
wb_vol_ratio_icv +
(1|Cohort),
data = subset(df.bl, !is.na(df.bl$EDSSbaseline)),
control = lmerControl(optimizer = "Nelder_Mead"))
summary(fit.edss.poly)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## BrainPAD ~ poly(EDSSbaseline, 2) + poly(age_at_scan, 2) + gender +
## field_strength + wb_vol_ratio_icv + (1 | Cohort)
## Data: subset(df.bl, !is.na(df.bl$EDSSbaseline))
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 7796.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9258 -0.6284 -0.0525 0.6069 4.8209
##
## Random effects:
## Groups Name Variance Std.Dev.
## Cohort (Intercept) 8.422 2.902
## Residual 44.993 6.708
## Number of obs: 1174, groups: Cohort, 14
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 135.3383 3.9934 1050.1573 33.890 < 2e-16 ***
## poly(EDSSbaseline, 2)1 43.3875 9.4512 1113.9207 4.591 4.92e-06 ***
## poly(EDSSbaseline, 2)2 -11.3812 7.0429 1165.1705 -1.616 0.106366
## poly(age_at_scan, 2)1 -175.1301 8.7644 1164.1102 -19.982 < 2e-16 ***
## poly(age_at_scan, 2)2 -23.2697 6.9159 1159.2976 -3.365 0.000791 ***
## gendermale -0.9541 0.4213 1158.8600 -2.265 0.023711 *
## field_strength3T -1.7994 0.7332 395.4624 -2.454 0.014548 *
## wb_vol_ratio_icv -156.3987 4.9203 1164.1857 -31.786 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) p(EDSS,2)1 p(EDSS,2)2 p(__,2)1 p(__,2)2 gndrml fld_3T
## pl(EDSS,2)1 -0.294
## pl(EDSS,2)2 0.002 -0.061
## ply(g__,2)1 -0.363 -0.224 0.026
## ply(g__,2)2 0.073 -0.122 -0.076 0.040
## gendermale -0.228 0.007 -0.005 0.108 0.050
## fld_strng3T -0.033 0.030 0.008 -0.012 -0.029 -0.051
## wb_vl_rt_cv -0.975 0.289 -0.004 0.367 -0.076 0.196 -0.042
Non-parametric correlation between brain-PAD and EDSS at baseline
with(df.bl, cor.test(BrainPAD, EDSSbaseline, method = "spearman"))
## Warning in cor.test.default(BrainPAD, EDSSbaseline, method = "spearman"):
## Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: BrainPAD and EDSSbaseline
## S = 200790000, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2554724
Plot EDSS histogram by subtype
ggplot(subset(df.bl, !is.na(df.bl$EDSSbaseline)), aes(EDSSbaseline, fill = subtype)) +
scale_fill_manual(values = ms.palette[-1]) +
geom_histogram(bins = 20) +
facet_wrap(~ subtype) +
theme_cowplot()

Test for interaction between subtype and EDSS on brain-PAD
fit.edss <- lmer(BrainPAD ~ EDSSbaseline * subtype + poly(age_at_baseline_scan1,2) + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control"))
round(as.matrix(anova(fit.edss)["EDSSbaseline:subtype",]),3)
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## EDSSbaseline:subtype 138.445 46.148 3 1157.787 1.079 0.357
Use simple slopes from jtools to extract adjusted slopes for each subtype. Need to fit model without age^2 as poly() is incompatible with sim_slopes().
fit.edss2 <- lmer(BrainPAD ~ EDSSbaseline * subtype + age_at_baseline_scan1 + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control"))
sim_slopes(fit.edss2, pred = "EDSSbaseline", modx = "subtype", johnson_neyman = F)
## SIMPLE SLOPES ANALYSIS
##
## Slope of EDSSbaseline when subtype = CIS:
## Est. S.E. df p
## 0.15 0.40 0.37 0.71
##
## Slope of EDSSbaseline when subtype = RRMS:
## Est. S.E. df p
## 0.79 0.21 3.78 0.00
##
## Slope of EDSSbaseline when subtype = SPMS:
## Est. S.E. df p
## 0.20 0.53 0.38 0.71
##
## Slope of EDSSbaseline when subtype = PPMS:
## Est. S.E. df p
## 0.27 0.46 0.58 0.56
Use interact_plot() from jtools to plot the adjusted slopes per group.
edss.plot <- interact_plot(fit.edss2, pred = "EDSSbaseline", modx = "subtype",
plot.points = T,
interval = T,
vary.lty = T,
facet.modx = T,
x.label = "EDSS", y.label = "Brain-PAD (years)",
point.size = 1,
modx.labels = c("CIS", "RRMS", "SPMS", "PPMS")) +
geom_hline(yintercept = 0, lty = 2) +
scale_fill_manual(values = ms.palette[2:5], name = "MS subtype") +
scale_color_manual(values = ms.palette[2:5], name = "MS subtype") +
theme_cowplot()
# ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/EDSS_brain-PAD_plot.pdf", height = 5, width = 8, useDingbats = FALSE)
Age at diagnosis
LME accounting for fixed effects of age at baseline, age^2, gender, and random effects of Cohort and field strength. Exclude CIS patients and healthy controls.
summary(run_lm("disease_onset_age", subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS")))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +
## wb_vol_ratio_icv + (1 | Cohort)
## Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 5946.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7293 -0.6049 -0.0352 0.6113 4.2337
##
## Random effects:
## Groups Name Variance Std.Dev.
## Cohort (Intercept) 10.89 3.300
## Residual 42.68 6.533
## Number of obs: 900, groups: Cohort, 14
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 134.74403 4.06450 766.40479 33.151 < 2e-16
## data[[var]] -0.16109 0.03591 891.85866 -4.486 8.19e-06
## poly(age_at_scan, 2)1 -117.99406 11.97374 892.65321 -9.854 < 2e-16
## poly(age_at_scan, 2)2 -13.86667 6.67536 883.28360 -2.077 0.03806
## gendermale -0.18540 0.47044 881.60719 -0.394 0.69361
## field_strength3T 2.78246 0.96484 189.74899 2.884 0.00438
## wb_vol_ratio_icv -151.57879 5.24069 888.63597 -28.923 < 2e-16
##
## (Intercept) ***
## data[[var]] ***
## poly(age_at_scan, 2)1 ***
## poly(age_at_scan, 2)2 *
## gendermale
## field_strength3T **
## wb_vol_ratio_icv ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dt[[]] p(__,2)1 p(__,2)2 gndrml fld_3T
## data[[var]] -0.005
## ply(g__,2)1 -0.278 -0.743
## ply(g__,2)2 0.047 0.036 -0.033
## gendermale -0.217 -0.086 0.125 0.067
## fld_strng3T -0.097 0.062 -0.055 -0.004 -0.042
## wb_vl_rt_cv -0.926 -0.287 0.492 -0.060 0.199 -0.010
When predicting brain-PAD in a LME model, the effect of age at diagnosis at baseline beta = -0.16, 95% CI = -0.23, -0.09, p = 8.1884e-06.
Test for interaction between subtype and age at diagnosis on brain-PAD:
fit.age <- lmer(BrainPAD ~ disease_onset_age * subtype + poly(age_at_baseline_scan1,2) + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS"))
round(as.matrix(anova(fit.age)["disease_onset_age:subtype",]),3)
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## disease_onset_age:subtype 51.344 25.672 2 877.313 0.6 0.549
Use simple slopes from jtools to extract adjusted slopes for each subtype.
fit.age2 <- lmer(BrainPAD ~ disease_onset_age * subtype + age_at_baseline_scan1 + gender + field_strength + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS"))
sim_slopes(fit.age2, pred = "disease_onset_age", modx = "subtype", johnson_neyman = F)
## SIMPLE SLOPES ANALYSIS
##
## Slope of disease_onset_age when subtype = RRMS:
## Est. S.E. df p
## -0.37 0.05 -6.75 0.00
##
## Slope of disease_onset_age when subtype = SPMS:
## Est. S.E. df p
## -0.57 0.09 -6.39 0.00
##
## Slope of disease_onset_age when subtype = PPMS:
## Est. S.E. df p
## -0.51 0.10 -5.35 0.00
age.plot <- interact_plot(fit.age2, pred = "disease_onset_age", modx = "subtype",
plot.points = T, interval = T, vary.lty = T, facet.modx = T,
x.label = "Age at clincial diagnosis (years)", y.label = "Brain-PAD (years)",
point.size = 1,
modx.labels = c("RRMS", "SPMS", "PPMS")) +
geom_hline(yintercept = 0, lty = 2) +
scale_fill_manual(values = ms.palette[3:5], name = "MS subtype") +
scale_color_manual(values = ms.palette[3:5], name = "MS subtype") +
theme_cowplot()
# ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/diagnosis_age_brain-PAD_plot.pdf", height = 5, width = 8, useDingbats = FALSE)
Time since diagnosis
LME accounting for fixed effects of age at baseline, age^2 gender, and random effects of Cohort and field strength. Exclude controls, CIS patients and anyone with a time since diagnosis = 0.
summary(run_lm("disease_duration_at_baseline_scan1", subset(df.bl, df.bl$subtype != "CIS" & df.bl$disease_duration > 0)))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +
## wb_vol_ratio_icv + (1 | Cohort)
## Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 5677.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7809 -0.6013 -0.0357 0.6135 4.1849
##
## Random effects:
## Groups Name Variance Std.Dev.
## Cohort (Intercept) 10.79 3.284
## Residual 43.44 6.591
## Number of obs: 857, groups: Cohort, 14
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 128.02789 4.39562 774.82522 29.126 < 2e-16
## data[[var]] 0.15577 0.03642 849.23218 4.277 2.11e-05
## poly(age_at_scan, 2)1 -165.16100 8.38789 849.56458 -19.690 < 2e-16
## poly(age_at_scan, 2)2 -13.78655 6.71476 839.99378 -2.053 0.04037
## gendermale -0.13429 0.48410 838.71263 -0.277 0.78153
## field_strength3T 2.65557 0.97568 181.83682 2.722 0.00712
## wb_vol_ratio_icv -151.65033 5.34831 845.15051 -28.355 < 2e-16
##
## (Intercept) ***
## data[[var]] ***
## poly(age_at_scan, 2)1 ***
## poly(age_at_scan, 2)2 *
## gendermale
## field_strength3T **
## wb_vol_ratio_icv ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dt[[]] p(__,2)1 p(__,2)2 gndrml fld_3T
## data[[var]] -0.339
## ply(g__,2)1 -0.266 -0.317
## ply(g__,2)2 0.058 -0.024 -0.008
## gendermale -0.222 0.082 0.062 0.060
## fld_strng3T -0.067 -0.053 0.009 -0.006 -0.043
## wb_vl_rt_cv -0.971 0.283 0.303 -0.063 0.187 -0.014
When predicting brain-PAD in a LME model, the effect of time since diagnosis at baseline beta = 0.16, 95% CI = 0.08, 0.23, p = 2.1098e-05.
Test for interaction between subtype and time since diagnosis on brain-PAD
fit.time <- lmer(BrainPAD ~ disease_duration_at_baseline_scan1 * subtype + poly(age_at_baseline_scan1,2) + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS" & df.bl$disease_duration > 0))
round(as.matrix(anova(fit.time)["disease_duration_at_baseline_scan1:subtype",]),3)
## Sum Sq Mean Sq NumDF DenDF
## disease_duration_at_baseline_scan1:subtype 84.065 42.032 2 797.105
## F value Pr(>F)
## disease_duration_at_baseline_scan1:subtype 0.966 0.381
Use simple slopes from jtools to extract adjusted slopes for each subtype.
fit.time2 <- lmer(BrainPAD ~ disease_duration_at_baseline_scan1 * subtype + age_at_baseline_scan1 + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS" & df.bl$disease_duration > 0))
sim_slopes(fit.time2, pred = "disease_duration_at_baseline_scan1", modx = "subtype", johnson_neyman = F)
## SIMPLE SLOPES ANALYSIS
##
## Slope of disease_duration_at_baseline_scan1 when subtype = RRMS:
## Est. S.E. df p
## 0.19 0.04 4.29 0.00
##
## Slope of disease_duration_at_baseline_scan1 when subtype = SPMS:
## Est. S.E. df p
## 0.08 0.07 1.06 0.29
##
## Slope of disease_duration_at_baseline_scan1 when subtype = PPMS:
## Est. S.E. df p
## 0.02 0.13 0.12 0.90
time.plot <- interact_plot(fit.time2, pred = "disease_duration_at_baseline_scan1", modx = "subtype",
plot.points = T, interval = T, vary.lty = T, facet.modx = T, point.size = 1,
x.label = "Time since clinical diagnosis (years)", y.label = "Brain-PAD (years)",
modx.labels = c("RRMS", "SPMS", "PPMS")) +
geom_hline(yintercept = 0, lty = 2) +
scale_fill_manual(values = ms.palette[3:5], name = "MS subtype") +
scale_color_manual(values = ms.palette[3:5], name = "MS subtype") +
theme_cowplot()
# ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/diagnosis_years_brain-PAD_plot.pdf", height = 5, width = 8, useDingbats = FALSE)
Arrange EDSS, age at diagnosis and time since diagnosis plots
Use cowplot package.
cowplot::plot_grid(edss.plot, age.plot, time.plot, labels = "AUTO", ncol = 1)

ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/clinical_brain-PAD_plots.pdf", height = 15, width = 8, useDingbats = FALSE)
Brain age adjusted for brain volumes
Function to get standardised beta coefficients from lmer, from stack exchange: https://stats.stackexchange.com/questions/123366/lmer-standardized-regression-coefficients
lm.beta.lmer <- function(mod) {
b <- fixef(mod)[-1]
sd.x <- apply(getME(mod,"X")[,-1],2,sd)
sd.y <- sd(getME(mod,"y"))
b*sd.x/sd.y
}
Baseline clinical measures, adjusting for whole-brain volume as ratio of ICV, along with standard covariates.
EDSS_baseline <- lm.beta.lmer(lmer(EDSSbaseline ~ BrainPAD + wb_vol_ratio_icv + poly(age_at_scan, 2) + gender + field_strength + (1|Cohort), data = df.bl, control = lmerControl(optimizer = "Nelder_Mead")))
Time_since_diagnosis <- lm.beta.lmer(model <- lmer(disease_duration ~ BrainPAD + wb_vol_ratio_icv + poly(age_at_scan, 2) + gender + field_strength + (1|Cohort), data = df.bl, control = lmerControl(optimizer = "Nelder_Mead")))
Age_at_onset <- lm.beta.lmer(lmer(disease_onset_age ~ BrainPAD + wb_vol_ratio_icv + poly(age_at_scan, 2) + gender + field_strength + (1|Cohort), data = df.bl, control = lmerControl(optimizer = "Nelder_Mead")))
round(as.data.frame(rbind(EDSS_baseline, Time_since_diagnosis, Age_at_onset)),3)
## BrainPAD wb_vol_ratio_icv poly(age_at_scan, 2)1
## EDSS_baseline 0.141 -0.143 0.265
## Time_since_diagnosis 0.245 -0.102 0.407
## Age_at_onset -0.202 0.086 0.774
## poly(age_at_scan, 2)2 gendermale field_strength3T
## EDSS_baseline 0.096 0.000 -0.068
## Time_since_diagnosis 0.074 -0.041 -0.062
## Age_at_onset -0.065 0.028 0.044
DMT
Effects of disease modifying treatment on brain-PAD
table(df.bl$DMT_YesNoNA, df.bl$subtype)
##
## control CIS RRMS SPMS PPMS
## NO 0 234 285 52 104
## YES 0 60 356 51 8
Descriptive statistics brain-PAD by DMT status
with(subset(df.bl, df.bl$control0rest1 == "MS"), describeBy(BrainPAD, DMT_YesNoNA))
##
## Descriptive statistics by group
## group: NO
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 675 8.21 9.12 6.99 7.63 8.49 -13.07 47.21 60.28 0.7 0.79
## se
## X1 0.35
## --------------------------------------------------------
## group: YES
## vars n mean sd median trimmed mad min max range skew
## X1 1 475 14.22 10.66 13.28 13.78 10.87 -18.18 48.67 66.85 0.37
## kurtosis se
## X1 -0.01 0.49
Fit LME model
fit_DMT <- lmer(BrainPAD ~
subtype +
poly(age_at_scan, 2) +
gender +
field_strength +
wb_vol_ratio_icv +
DMT_YesNoNA +
(1|Cohort),
data = df.bl,
control = lmerControl(optimizer = "Nelder_Mead"))
emmeans(object = fit_DMT, pairwise ~ DMT_YesNoNA)
## $emmeans
## DMT_YesNoNA emmean SE df lower.CL upper.CL
## NO 10.2 0.696 21.3 8.76 11.6
## YES 12.1 0.755 27.6 10.56 13.7
##
## Results are averaged over the levels of: subtype, gender, field_strength
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## NO - YES -1.91 0.49 1072 -3.898 0.0001
##
## Results are averaged over the levels of: subtype, gender, field_strength
## Degrees-of-freedom method: kenward-roger
EDSS progression survival analysis
Based on Arman Eshaghi’s code used in Eshaghi et al., 2018 Annals of Neurology. Function for characterising EDSS progression, based on different rates of change and different baseline EDSS values
is_sustained_progression <- function(edssAtStart, change){
sustainedProgression <- FALSE
#if start of edss is 0, 1.5 increase is considered sustained progression
if ((edssAtStart < 1) & (change >= 1.5)) {
sustainedProgression <- TRUE
}
#if start of edss is 6 or above, 0.5 increase is considered sustained progression
else if ((edssAtStart >= 6) & (change >= 0.5 )) {
sustainedProgression <- TRUE
}
#if start of edss is more than zero but less than 6, sustained progression is by 1 increase in edss
else if ((edssAtStart >= 1 ) & (edssAtStart < 6 ) & (change >= 1 )) {
sustainedProgression <- TRUE
}
return(sustainedProgression)
}
Determine change in EDSS from baseline to last follow-up
Select latest EDSS per subject in subjects with 2 or more assessments
y1 <- df %>%
filter(!subtype == "control") %>%
filter(!is.na(EDSSbaseline)) %>%
group_by(PatientID) %>%
top_n(1, interval) %>%
ungroup() %>%
dplyr::rename(latest_EDSS = EDSSatScan) %>%
dplyr::select(PatientID, interval, EDSSbaseline, latest_EDSS) %>%
filter(!is.na(latest_EDSS)) %>%
mutate(EDSSchange = latest_EDSS - EDSSbaseline)
# apply Arman's function
y1$EDSS_progression <- mapply(is_sustained_progression, y1$EDSSbaseline, y1$EDSSchange)
## get baseline brain-PAD and brain volumetric measures
y2 <- df %>%
filter(!subtype == "control") %>%
filter(interval == 0) %>%
filter(!is.na(EDSSbaseline)) %>%
dplyr::rename(BrainPAD_baseline = BrainPAD) %>%
dplyr::rename(GM_vol_baseline = GM_vol) %>%
dplyr::rename(WM_vol_baseline = WM_vol) %>%
dplyr::rename(CSF_vol_baseline = CSF_vol) %>%
dplyr::rename(WBV_baseline = WBV) %>%
dplyr::rename(ICV_baseline = ICV) %>%
dplyr::select(-one_of('interval'))
y3 <- right_join(y1, y2, by = c("PatientID")) %>%
filter(!is.na(latest_EDSS))
Numbers of EDSS progressors
The number of MS patients with >= 2 EDSS scores was 1143.
table(y3$EDSS_progression) # calculate proportion of patients who progress
##
## FALSE TRUE
## 840 303
round(prop.table(table(y3$EDSS_progression)),3) # calculate percentage of patients who progress
##
## FALSE TRUE
## 0.735 0.265
Run survival analysis
Using brain-PAD with normal covariates
# creating new response function
S <- Surv(time = y3$interval, event = y3$EDSS_progression)
# Brain-PAD, age, sex model
surv.model <- coxph(S ~ BrainPAD_baseline + poly(age_at_baseline_scan1,2) + gender + field_strength, data = y3)
summary(surv.model)
## Call:
## coxph(formula = S ~ BrainPAD_baseline + poly(age_at_baseline_scan1,
## 2) + gender + field_strength, data = y3)
##
## n= 1143, number of events= 303
##
## coef exp(coef) se(coef) z
## BrainPAD_baseline 2.301e-02 1.023e+00 5.801e-03 3.966
## poly(age_at_baseline_scan1, 2)1 1.106e+01 6.330e+04 2.117e+00 5.223
## poly(age_at_baseline_scan1, 2)2 8.872e-01 2.428e+00 2.072e+00 0.428
## gendermale 2.317e-01 1.261e+00 1.202e-01 1.927
## field_strength3T 3.472e-01 1.415e+00 1.705e-01 2.037
## Pr(>|z|)
## BrainPAD_baseline 7.32e-05 ***
## poly(age_at_baseline_scan1, 2)1 1.76e-07 ***
## poly(age_at_baseline_scan1, 2)2 0.6686
## gendermale 0.0540 .
## field_strength3T 0.0417 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## BrainPAD_baseline 1.023 0.9772581 1.01170 1.035e+00
## poly(age_at_baseline_scan1, 2)1 63298.139 0.0000158 998.79076 4.012e+06
## poly(age_at_baseline_scan1, 2)2 2.428 0.4118114 0.04182 1.410e+02
## gendermale 1.261 0.7931507 0.99607 1.596e+00
## field_strength3T 1.415 0.7066566 1.01319 1.976e+00
##
## Concordance= 0.648 (se = 0.017 )
## Rsquare= 0.051 (max possible= 0.953 )
## Likelihood ratio test= 60.25 on 5 df, p=1e-11
## Wald test = 62.84 on 5 df, p=3e-12
## Score (logrank) test = 65.45 on 5 df, p=9e-13
# Check the assumptions of proportional hazards are met
cox.zph(surv.model)
## rho chisq p
## BrainPAD_baseline 0.145349 7.54e+00 0.006035
## poly(age_at_baseline_scan1, 2)1 -0.068122 1.44e+00 0.229532
## poly(age_at_baseline_scan1, 2)2 0.166316 7.78e+00 0.005279
## gendermale -0.000737 1.67e-04 0.989698
## field_strength3T -0.163475 9.14e+00 0.002502
## GLOBAL NA 2.22e+01 0.000485
Check that the brain-PAD line is horizontal.
plot(cox.zph(surv.model)[1])
The hazard ratio for brain-PAD on time-to-disease-progression was HR (95% CI) = 1.023, 1.012, 1.035. That means for every additional +1 year of brain-PAD there is a 1.023% increase in the likelihood of EDSS progression. Extrapolated over 5 years of brain-PAD, there is a 1.122 increase in the likelihood of EDSS progression.
Adding normalised brain volume as a covariate
# creating new response function
S <- Surv(time = y3$interval, event = y3$EDSS_progression)
# Brain-PAD, age, sex model
surv.model <- coxph(S ~ BrainPAD_baseline + poly(age_at_baseline_scan1,2) + gender + field_strength + wb_vol_ratio_icv, data = y3)
summary(surv.model)
## Call:
## coxph(formula = S ~ BrainPAD_baseline + poly(age_at_baseline_scan1,
## 2) + gender + field_strength + wb_vol_ratio_icv, data = y3)
##
## n= 1143, number of events= 303
##
## coef exp(coef) se(coef) z
## BrainPAD_baseline -4.807e-03 9.952e-01 7.826e-03 -0.614
## poly(age_at_baseline_scan1, 2)1 2.097e+00 8.144e+00 2.707e+00 0.775
## poly(age_at_baseline_scan1, 2)2 -2.016e-01 8.174e-01 2.086e+00 -0.097
## gendermale 4.873e-02 1.050e+00 1.252e-01 0.389
## field_strength3T 3.860e-01 1.471e+00 1.712e-01 2.254
## wb_vol_ratio_icv -9.416e+00 8.141e-05 1.728e+00 -5.448
## Pr(>|z|)
## BrainPAD_baseline 0.5390
## poly(age_at_baseline_scan1, 2)1 0.4385
## poly(age_at_baseline_scan1, 2)2 0.9230
## gendermale 0.6971
## field_strength3T 0.0242 *
## wb_vol_ratio_icv 5.11e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## BrainPAD_baseline 9.952e-01 1.005e+00 9.801e-01 1.011e+00
## poly(age_at_baseline_scan1, 2)1 8.144e+00 1.228e-01 4.044e-02 1.640e+03
## poly(age_at_baseline_scan1, 2)2 8.174e-01 1.223e+00 1.369e-02 4.880e+01
## gendermale 1.050e+00 9.524e-01 8.215e-01 1.342e+00
## field_strength3T 1.471e+00 6.798e-01 1.052e+00 2.058e+00
## wb_vol_ratio_icv 8.141e-05 1.228e+04 2.750e-06 2.410e-03
##
## Concordance= 0.661 (se = 0.018 )
## Rsquare= 0.074 (max possible= 0.953 )
## Likelihood ratio test= 87.89 on 6 df, p=<2e-16
## Wald test = 96.81 on 6 df, p=<2e-16
## Score (logrank) test = 98.77 on 6 df, p=<2e-16
# Check the assumptions of proportional hazards are met
cox.zph(surv.model)
## rho chisq p
## BrainPAD_baseline 0.0804 2.224 0.135892
## poly(age_at_baseline_scan1, 2)1 -0.0893 2.830 0.092543
## poly(age_at_baseline_scan1, 2)2 0.1581 7.318 0.006827
## gendermale -0.0284 0.268 0.604561
## field_strength3T -0.1616 8.835 0.002955
## wb_vol_ratio_icv -0.0612 1.328 0.249135
## GLOBAL NA 24.602 0.000405
Time-to-EDSS progression Kaplan-Meier plots
Run survplot on survival object
Based on a median split of brain-PAD.
km.plot.df <- y3 %>% mutate(split_BrainPAD = ntile(BrainPAD_baseline, 2))
str(km.plot.df)
## Classes 'tbl_df', 'tbl' and 'data.frame': 1143 obs. of 39 variables:
## $ PatientID : Factor w/ 1367 levels "AMSTERDAM_4001",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ interval : num 2.32 2.35 2.32 1.23 1.23 ...
## $ EDSSbaseline.x : num 2.5 3 4 2.5 7.5 4 2 6 2 6.5 ...
## $ latest_EDSS : num 3 2.5 4 4 7.5 4 2 6 2.5 6.5 ...
## $ EDSSchange : num 0.5 -0.5 0 1.5 0 0 0 0 0.5 0 ...
## $ EDSS_progression : logi FALSE FALSE FALSE TRUE FALSE FALSE ...
## $ Cohort : Factor w/ 15 levels "Amsterdam","Barcelona",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ scanner_status : Factor w/ 4 levels "1.5T","1.5T_post_2004",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ gender : Factor w/ 2 levels "female","male": 2 1 1 1 1 1 1 1 2 2 ...
## $ control0ppms1cisoforever2other3 : int 3 3 3 3 3 3 3 3 3 1 ...
## $ control0rest1 : Factor w/ 2 levels "control","MS": 2 2 2 2 2 2 2 2 2 2 ...
## $ control0ms1cis2 : Factor w/ 3 levels "CIS","control",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ age_at_baseline_scan1 : num 57.8 44.4 28.3 44.8 39 ...
## $ disease_duration_at_baseline_scan1: num 6.8 6.38 6.29 6.79 19.02 ...
## $ DMT_YesNoNA : Factor w/ 2 levels "NO","YES": 1 1 2 2 2 1 2 2 2 1 ...
## $ DMTYes1 : Factor w/ 2 levels "No treatment",..: 1 1 2 2 2 1 2 2 2 1 ...
## $ EDSSbaseline.y : num 2.5 3 4 2.5 7.5 4 2 6 2 6.5 ...
## $ NoScans : int 3 3 3 2 2 3 3 3 3 2 ...
## $ FUTimeMax : num 2.32 2.35 2.32 1.23 1.23 ...
## $ ScanName : Factor w/ 3603 levels "AMSTERDAM_4001_baseline_t1",..: 1 4 7 10 12 14 17 20 23 26 ...
## $ age_at_scan : num 57.8 44.4 28.3 44.8 39 ...
## $ EDSSatScan : num 2.5 3 4 2.5 7.5 4 2 6 2 6.5 ...
## $ BrainAge : num 69.6 58.1 52.4 54 73 ...
## $ BrainPAD_baseline : num 11.75 13.7 24.09 9.22 33.99 ...
## $ subtype : Factor w/ 5 levels "control","CIS",..: 3 3 3 3 4 3 3 3 3 5 ...
## $ disease_onset_age : num 51 38 22 38 20 ...
## $ disease_duration : num 6.8 6.38 6.29 6.79 19.02 ...
## $ scan_number : Factor w/ 10 levels "scan1","scan10",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ GM_vol_baseline : num 0.574 0.622 0.647 0.648 0.575 0.609 0.641 0.56 0.712 0.635 ...
## $ WM_vol_baseline : num 0.536 0.43 0.404 0.599 0.394 0.493 0.503 0.382 0.496 0.534 ...
## $ CSF_vol_baseline : num 0.352 0.293 0.355 0.271 0.45 0.259 0.396 0.384 0.337 0.47 ...
## $ WBV_baseline : num 1.11 1.052 1.051 1.247 0.969 ...
## $ ICV_baseline : num 1.46 1.34 1.41 1.52 1.42 ...
## $ gm_vol_ratio_icv : num 0.393 0.462 0.46 0.427 0.405 ...
## $ wm_vol_ratio_icv : num 0.367 0.32 0.287 0.395 0.278 ...
## $ csf_vol_ratio_icv : num 0.241 0.218 0.252 0.179 0.317 ...
## $ wb_vol_ratio_icv : num 0.759 0.782 0.748 0.821 0.683 ...
## $ field_strength : Factor w/ 2 levels "1.5T","3T": 1 1 1 1 1 1 1 1 1 1 ...
## $ split_BrainPAD : int 2 2 2 1 2 1 2 1 2 2 ...
S <- Surv(time = km.plot.df$interval, event = km.plot.df$EDSS_progression) # response function
# survplot <- ggsurvplot(survfit(S ~ split_BrainPAD, data = km.plot.df),
# surv.plot.height = 0.9,
# ggtheme = theme_survminer(),
# risk.table = T,
# cumcensor = F,
# conf.int = F,
# palette = c("blue", "red"),
# censor = F,
# xlab = "Time (years)",
# legend.labs = c("Brain-PAD < median", "Brain-PAD > median"))
# survplot
# ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/KM_brain-PAD_plot.pdf", height = 8, width = 8, print(survplot, newpage = FALSE), useDingbats = FALSE)
The median value = 9.68 years.
Longitudinal brain-age analysis
The total number of people with two or more scans was n = 1266. This included n = 1155 MS patients and n = 111 controls.
With 3 or more scans, there were n = 509 MS patients and n = 64 controls.
Determine change in brain-PAD from baseline to last follow-up
## select latest brain-PAD per subject in subjects with 2 or more assessments
z1 <- df %>%
filter(NoScans >= 2) %>%
group_by(PatientID) %>%
top_n(1, interval) %>%
ungroup() %>%
dplyr::rename(latest_BrainPAD = BrainPAD) %>%
dplyr::rename(latest_wb_vol_ratio_icv = wb_vol_ratio_icv) %>%
dplyr::select(PatientID, interval, latest_BrainPAD, latest_wb_vol_ratio_icv)
## baseline brain-PAD
z2 <- df %>%
filter(NoScans >= 2) %>%
filter(interval == 0) %>%
filter(!is.na(BrainPAD)) %>%
dplyr::rename(BrainPAD_baseline = BrainPAD) %>%
#dplyr::rename(GM_vol_baseline = GM_vol) %>%
#dplyr::rename(WM_vol_baseline = WM_vol) %>%
dplyr::rename(wb_vol_baseline = wb_vol_ratio_icv) %>%
dplyr::select(-one_of('interval'))
## calculate change in brain-PAD between baseline and latest brain-PAD
z3 <- right_join(z1, z2, by = c("PatientID")) %>%
mutate(BrainPAD_change = latest_BrainPAD - BrainPAD_baseline) %>%
mutate(wb_vol_ratio_icv_change = latest_wb_vol_ratio_icv - wb_vol_baseline)
Mean annualised rates of change in brain-PAD per group
describeBy(z3$BrainPAD_change/z3$interval, z3$subtype, mat = T, digits = 2)
## item group1 vars n mean sd median trimmed mad min max
## X11 1 control 1 111 0.03 2.02 -0.09 -0.12 1.14 -4.65 9.86
## X12 2 CIS 1 279 0.23 2.20 0.12 0.21 1.21 -11.74 11.49
## X13 3 RRMS 1 652 0.29 1.70 0.17 0.22 1.07 -12.24 16.21
## X14 4 SPMS 1 104 -0.29 1.60 -0.24 -0.27 0.93 -6.11 7.01
## X15 5 PPMS 1 119 0.40 1.52 0.30 0.40 1.38 -4.66 4.24
## range skew kurtosis se
## X11 14.52 1.75 7.25 0.19
## X12 23.23 0.08 6.74 0.13
## X13 28.45 0.98 18.93 0.07
## X14 13.12 0.31 4.84 0.16
## X15 8.90 -0.18 0.62 0.14
Determine change in EDSS from baseline to last follow-up
## select latest EDSS per subject in subjects with 2 or more assessments
a1 <- df %>%
filter(NoScans >= 2) %>%
group_by(PatientID) %>%
top_n(1, interval) %>%
ungroup() %>%
dplyr::rename(latest_EDSSatScan = EDSSatScan) %>%
dplyr::select(PatientID, interval, latest_EDSSatScan)
## baseline EDSS
a2 <- df %>%
filter(NoScans >= 2) %>%
filter(interval == 0) %>%
filter(!is.na(EDSSatScan)) %>%
dplyr::rename(EDSSatScan_baseline = EDSSatScan) %>%
dplyr::rename(BrainPAD_baseline = BrainPAD) %>%
#dplyr::rename(GM_vol_baseline = GM_vol) %>%
#dplyr::rename(WM_vol_baseline = WM_vol) %>%
dplyr::rename(WB_vol_baseline = wb_vol_ratio_icv) %>%
dplyr::select(-one_of('interval'))
## calculate change in brain-PAD between baseline and latest brain-PAD
a3 <- right_join(a1, a2, by = c("PatientID")) %>%
mutate(EDSS_change = latest_EDSSatScan - EDSSatScan_baseline)
Mean annualised rates of change in EDSS per group
with(subset(a3, a3$control0rest1 != "control"), describeBy(EDSS_change/interval, subtype, mat = F, digits = 2))
##
## Descriptive statistics by group
## group: control
## NULL
## --------------------------------------------------------
## group: CIS
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 242 -0.26 1.05 0 -0.14 0.3 -6.86 3 9.86 -1.96 8.47
## se
## X1 0.07
## --------------------------------------------------------
## group: RRMS
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 635 0.12 0.45 0 0.1 0.19 -2.24 3.29 5.53 1.09 10.49
## se
## X1 0.02
## --------------------------------------------------------
## group: SPMS
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 104 0.14 0.29 0 0.11 0.07 -0.64 1.26 1.9 0.92 2.25
## se
## X1 0.03
## --------------------------------------------------------
## group: PPMS
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 117 0.36 0.63 0.17 0.27 0.25 -1.01 3 4.01 1.92 5.35
## se
## X1 0.06
Relationship between annualised EDSS change and brain-PAD change
The total number of patients with two or more scans was n = 1155.
delta.df <- join(a3, z3)
with(delta.df, cor.test(EDSS_change, BrainPAD_change, method = "pearson"))
##
## Pearson's product-moment correlation
##
## data: EDSS_change and BrainPAD_change
## t = 9.0089, df = 1095, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2067138 0.3169474
## sample estimates:
## cor
## 0.2626875
with(delta.df[complete.cases(delta.df),], pcor.test(EDSS_change, BrainPAD_change, wb_vol_ratio_icv_change, method = "pearson"))
## estimate p.value statistic n gp Method
## 1 0.08117744 0.01126417 2.539243 975 1 pearson
Historgams of baseline EDSS scores and EDSS changes
plot1 <- ggplot(df.bl, aes(x = EDSSbaseline)) + geom_histogram(binwidth = 1, fill = "dodgerblue", colour = "black") + xlab("EDSS at baseline") + theme_cowplot()
plot2 <- ggplot(delta.df, aes(x = EDSS_change)) + geom_histogram(binwidth = 1, fill = "dodgerblue", colour = "black") + xlab("EDSS change") + theme_cowplot()
plot3 <- ggplot(subset(df.bl, df.bl$control0rest1 == "MS"), aes(x = BrainPAD)) + geom_histogram(binwidth = 1, fill = "darkgoldenrod2", colour = "black") + xlab("Brain-PAD at baseline") + theme_cowplot()
plot4 <- ggplot(subset(delta.df, delta.df$control0rest1 == "MS"), aes(x = BrainPAD_change)) + geom_histogram(binwidth = 1, fill = "darkgoldenrod2", colour = "black") + xlab("Brain-PAD change") + theme_cowplot()
plot5 <- ggplot(delta.df, aes(x = EDSS_change, y = BrainPAD_change)) + geom_point(aes(colour = subtype)) + geom_smooth(method = "lm", colour = "black") + labs(x = "EDSS change", y = "Brain-PAD change") + theme_cowplot() + scale_color_manual(values = ms.palette[-1], name = "MS subtype")
pg <- cowplot::plot_grid(cowplot::plot_grid(plot1, plot2, plot3, plot4, labels = "AUTO", ncol = 2), plot5, ncol = 2, labels = c("", "E"), rel_widths = c(2,1.5))
pg

cowplot::save_plot("~/Work/Brain ageing/Collaborations/MS/plots/histograms_change_EDSS_brain-PAD_plot.pdf", pg, base_asp = 2.5)
Interaction between subtype and EDSS change
fit.change <- lm(BrainPAD_change ~ EDSS_change * subtype, data = delta.df)
anova(fit.change)
## Analysis of Variance Table
##
## Response: BrainPAD_change
## Df Sum Sq Mean Sq F value Pr(>F)
## EDSS_change 1 1452.1 1452.13 82.4760 < 2.2e-16 ***
## subtype 3 212.1 70.68 4.0146 0.007441 **
## EDSS_change:subtype 3 206.0 68.68 3.9008 0.008702 **
## Residuals 1089 19173.7 17.61
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Covarying for change in normalised brain volumes
fit.change2 <- lm(BrainPAD_change ~ EDSS_change * subtype + wb_vol_ratio_icv_change, data = delta.df)
anova(fit.change2)
## Analysis of Variance Table
##
## Response: BrainPAD_change
## Df Sum Sq Mean Sq F value Pr(>F)
## EDSS_change 1 1452.1 1452.1 117.0349 < 2.2e-16 ***
## subtype 3 212.1 70.7 5.6968 0.0007184 ***
## wb_vol_ratio_icv_change 1 5769.5 5769.5 464.9914 < 2.2e-16 ***
## EDSS_change:subtype 3 110.7 36.9 2.9750 0.0307508 *
## Residuals 1088 13499.6 12.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Use jtools package to get slopes from the model, per subtype.
sim_slopes(fit.change, pred = "EDSS_change", modx = "subtype", johnson_neyman = F, digits = 4)
## SIMPLE SLOPES ANALYSIS
##
## Slope of EDSS_change when subtype = CIS:
## Est. S.E. t val. p
## 0.8434 0.2189 3.8524 0.0001
##
## Slope of EDSS_change when subtype = RRMS:
## Est. S.E. t val. p
## 1.2534 0.1459 8.5883 0.0000
##
## Slope of EDSS_change when subtype = SPMS:
## Est. S.E. t val. p
## -0.6953 0.6510 -1.0680 0.2857
##
## Slope of EDSS_change when subtype = PPMS:
## Est. S.E. t val. p
## 0.5882 0.3464 1.6983 0.0897
interact_plot(fit.change, pred = "EDSS_change", modx = "subtype", plot.points = T, interval = T, facet.modx = T, x.label = "EDSS annualised change", y.label = "Brain-PAD annualised change", modx.labels = c("CIS", "RRMS", "SPMS", "PPMS")) + geom_hline(yintercept = 0, lty = 2) + theme_cowplot() +
scale_fill_manual(values = ms.palette[-1], name = "MS subtype") +
scale_color_manual(values = ms.palette[-1], name = "MS subtype")

ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/change_EDSS_brain-PAD_plot.pdf", height = 5, width = 8, useDingbats = FALSE)
Correlate baseline brain-PAD with the number of follow-up scans completed in the n=104 with >1 scan.
with(subset(df.bl, df.bl$NoScans > 1 & df.bl$subtype == "SPMS"), cor.test(BrainPAD, NoScans, method = "spearman"))
## Warning in cor.test.default(BrainPAD, NoScans, method = "spearman"): Cannot
## compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: BrainPAD and NoScans
## S = 241780, p-value = 0.002848
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.289771
Longitudinal brain-predicted age trajectories
Interaction between group and time MS vs. controls
Conditional growth model - random effects of participant and cohort
model_int.group <- lmer(BrainPAD ~ control0rest1 * interval + poly(age_at_baseline_scan1, 2) + gender + field_strength + (interval|PatientID) + (1|Cohort), data = df, control = lmerControl(optimizer = "Nelder_Mead"))
round(anova(model_int.group)["control0rest1:interval",],3)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## control0rest1:interval 27.695 27.695 1 1325.5 5.845 0.016 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model_int.group)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## BrainPAD ~ control0rest1 * interval + poly(age_at_baseline_scan1,
## 2) + gender + field_strength + (interval | PatientID) + (1 |
## Cohort)
## Data: df
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 21373.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -9.7410 -0.3578 -0.0138 0.3453 8.6034
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## PatientID (Intercept) 82.8868 9.1042
## interval 0.4612 0.6791 -0.06
## Cohort (Intercept) 15.2072 3.8996
## Residual 4.7381 2.1767
## Number of obs: 3564, groups: PatientID, 1354; Cohort, 15
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 0.05128 1.44705 38.42067 0.035
## control0rest1MS 12.14169 1.02786 1258.53795 11.813
## interval -0.16552 0.14960 1374.14620 -1.106
## poly(age_at_baseline_scan1, 2)1 -59.19725 16.33371 1335.78018 -3.624
## poly(age_at_baseline_scan1, 2)2 -33.79256 14.93286 1340.43263 -2.263
## gendermale 1.90177 0.52626 1337.05171 3.614
## field_strength3T -3.25935 0.99706 347.05623 -3.269
## control0rest1MS:interval 0.37219 0.15394 1325.50262 2.418
## Pr(>|t|)
## (Intercept) 0.971912
## control0rest1MS < 2e-16 ***
## interval 0.268735
## poly(age_at_baseline_scan1, 2)1 0.000301 ***
## poly(age_at_baseline_scan1, 2)2 0.023797 *
## gendermale 0.000313 ***
## field_strength3T 0.001187 **
## control0rest1MS:interval 0.015754 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cn01MS intrvl p(___1,2)1 p(___1,2)2 gndrml fld_3T
## cntrl0rs1MS -0.613
## interval -0.066 0.093
## pl(___1,2)1 0.072 -0.159 -0.001
## pl(___1,2)2 0.026 -0.055 0.003 0.026
## gendermale -0.147 0.027 0.003 0.027 0.059
## fld_strng3T -0.275 0.024 0.002 0.016 -0.027 -0.041
## cntrl0r1MS: 0.063 -0.098 -0.972 0.001 -0.003 -0.003 0.000
Use sjPlots to visualise the interactions, using estimated marginal means
plot_model(model_int.group, type = "emm", terms = c("interval [all]", "control0rest1"), colors = c("darkgreen", "firebrick"), legend.title = "Group", grid = F, show.data = F) + theme_cowplot() + labs(x = "Interval from baseline scan (years)", y = "Brain-PAD (years)") + geom_hline(yintercept = 0, lty = 2)
Generate estimate marginal trends for healthy controls and for all MS/CIS combined, using annualised difference between baseline and final follow-up.
# emtrends(object = model_int.group, pairwise ~ control0rest1, var = "interval")
Interaction between group and time MS subtypes
Conditional growth model - random effects of participant and cohort
model_int.subtype <- lmer(BrainPAD ~ subtype * interval + poly(age_at_baseline_scan1, 2) + gender + field_strength + (interval|PatientID) + (1|Cohort), data = subset(df, df$subtype != "control"), control = lmerControl(optimizer = "Nelder_Mead"))
round(anova(model_int.subtype)["subtype:interval",],3)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## subtype:interval 78.165 26.055 3 845.79 4.987 0.002 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Generate EMMs for healthy controls and for MS subtypes, using annualised difference between baseline and final follow-up.
# emtrends(object = model_int.subtype, pairwise ~ subtype, var = "interval")
plot_model(model_int.subtype, type = "emm", terms = c("interval [all]", "subtype"), legend.title = "Group", show.data = F, grid = F) + theme_cowplot() + labs(x = "Interval from baseline scan (years)", y = "Brain-PAD (years)") + geom_hline(yintercept = 0, lty = 2)

Longitudinal brain-PAD by interval plots
Randomly sub-sampling 50% of the data.
sub.sample.df <- df %>%
filter(NoScans >= 2) %>%
group_by(PatientID) %>%
sample_frac(0.5) %>%
ungroup()
ggplot(data = sub.sample.df, aes(x = interval, y = BrainPAD, fill = subtype)) +
geom_hline(yintercept = 0, lty = 2) +
geom_line(aes(group = PatientID, colour = subtype), alpha = 0.5, linetype = 1, size = 0.25) +
geom_point(aes(colour = subtype), size = 0.5, alpha = 0.5) +
labs(x = "Time (years)", y = "Brain-predicted age difference (years)") +
scale_fill_manual(values = ms.palette) +
scale_color_manual(values = ms.palette) +
# facet_grid(Cohort) +
theme_cowplot() + theme(legend.position = "none")
## Warning: Removed 1 rows containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).

ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/longitudinal_brain-PAD_time_plot.pdf", height = 6, width = 6, useDingbats = FALSE)
## Warning: Removed 1 rows containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
ggplot(data = df, aes(x = age_at_scan, y = BrainPAD, fill = subtype)) +
geom_hline(yintercept = 0, lty = 2) +
geom_line(aes(group = PatientID, colour = subtype), alpha = 0.5, linetype = 1, size = 0.2) +
# geom_smooth(method = "lm", aes(col = subtype)) +
labs(x = "Time (years)", y = "Brain-predicted age difference (years)") +
scale_fill_manual(values = ms.palette) +
scale_color_manual(values = ms.palette) +
facet_wrap(~ subtype, scales = "free_x") +
theme_cowplot() + theme(legend.position = "none")
## Warning: Removed 1 rows containing missing values (geom_path).

Age at diagnosis and longitudinal brain-PAD
model_int.onset <- lmer(BrainPAD ~ disease_onset_age * interval + poly(age_at_baseline_scan1, 2) + gender + field_strength + (interval|PatientID) + (1|Cohort), data = subset(df, df$control0rest1 == "MS"), control = lmerControl(optimizer = "Nelder_Mead"))
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning: Some predictor variables are on very different scales: consider
## rescaling
anova(model_int.onset)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value
## disease_onset_age 937.33 937.33 1 1035.25 175.4335
## interval 37.98 37.98 1 696.12 7.1079
## poly(age_at_baseline_scan1, 2) 346.90 173.45 2 1044.47 32.4633
## gender 65.08 65.08 1 1127.12 12.1810
## field_strength 26.03 26.03 1 179.47 4.8725
## disease_onset_age:interval 5.65 5.65 1 693.35 1.0572
## Pr(>F)
## disease_onset_age < 2.2e-16 ***
## interval 0.0078525 **
## poly(age_at_baseline_scan1, 2) 2.1e-14 ***
## gender 0.0005014 ***
## field_strength 0.0285553 *
## disease_onset_age:interval 0.3042026
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Time since diagnosis and longitudinal brain-PAD
model_int.duration <- lmer(BrainPAD ~ disease_duration * interval + poly(age_at_baseline_scan1, 2) + gender + field_strength + (interval|PatientID) + (1|Cohort), data = subset(df, df$control0rest1 == "MS"), control = lmerControl(optimizer = "Nelder_Mead"))
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning: Some predictor variables are on very different scales: consider
## rescaling
anova(model_int.duration)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value
## disease_duration 933.97 933.97 1 1013.59 179.9523
## interval 331.67 331.67 1 526.36 63.9038
## poly(age_at_baseline_scan1, 2) 462.49 231.25 2 1076.16 44.5554
## gender 61.60 61.60 1 1085.24 11.8684
## field_strength 23.44 23.44 1 171.50 4.5165
## disease_duration:interval 172.66 172.66 1 747.76 33.2681
## Pr(>F)
## disease_duration < 2.2e-16 ***
## interval 8.330e-15 ***
## poly(age_at_baseline_scan1, 2) < 2.2e-16 ***
## gender 0.0005928 ***
## field_strength 0.0350017 *
## disease_duration:interval 1.175e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
onset.plt <- plot_model(model_int.onset, type = "pred", terms = c("interval [all]", "disease_onset_age[20,30,40,50,60]"), legend.title = "Onset age (years)", show.data = F, grid = F) +
geom_hline(yintercept = 0, lty = 2) +
labs(x = "Interval from baseline scan (years)", y = "Brain-PAD (years)") +
theme_cowplot()
duration.plt <- plot_model(model_int.duration, type = "pred", terms = c("interval [all]", "disease_duration[0,7.5,15]"), legend.title = "Time since diagnosis (years)", show.data = F, grid = F) +
geom_hline(yintercept = 0, lty = 2) +
labs(x = "Interval from baseline scan (years)", y = "Brain-PAD (years)") +
theme_cowplot()
cowplot::plot_grid(onset.plt, duration.plt, nrow = 1)

Plot(s) with age and estimated age for each individual from each group and site
ggplot(data = df, aes(x = age_at_scan, y = BrainAge, fill = subtype)) +
geom_abline(slope = 1, lty = 2) +
geom_line(aes(group = PatientID, colour = subtype), alpha = 0.5, linetype = 1, size = 0.25) +
geom_point(aes(colour = subtype), size = 0.5, alpha = 0.5) +
labs(x = "Age (years)", y = "Brain-predicted age (years)") +
scale_fill_manual(values = ms.palette) +
scale_color_manual(values = ms.palette) +
facet_wrap(~ Cohort) +
theme_cowplot() + theme(legend.position = "none")

ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/longitudinal_brain-age_years_plot.pdf", height = 6, width = 10, useDingbats = FALSE)